https://fivethirtyeight.com/features/can-you-drink-more-coffee-than-your-coworkers/
Riddler Headquarters is a buzzing hive of activity. Mathematicians, statisticians and programmers roam the halls at all hours, proving theorems and calculating probabilities. They’re fueled, of course, by caffeine. But the headquarters has just one coffee pot, along with one unbreakable rule: You finish the joe, you make some mo’.
Specifically, the coffee pot holds one gallon of coffee, and workers fill their mugs from it in sequence. Whoever takes the last drop has to make the next pot, no ifs, ands or buts. Every worker in the office is trying to take as much coffee as he or she can while minimizing the probability of having to refill the pot. Also, this pot is both incredibly heavy and completely opaque, so it’s tough to tell how much remains. That means a worker can’t keep pouring until she sees or feels just a drop left. Anyone stuck refilling the pot becomes so frustrated that they throw their cup to the ground in frustration, so they get no coffee that round.
Congratulations! You’ve just been hired to work at Riddler Headquarters. Submit a number between 0 and 1. (It could be 0.9999, or 0.0001, or 0.5, or 0.12345, and so on.) This is the number of gallons of coffee you will attempt to take from the pot each time you go for a cup. If that amount remains, lucky you, you get to drink it. If less remains, you’re out of luck that round; you must refill the pot, and you get no coffee.
Once I’ve received your submissions, I’ll randomize the order in which you and your colleagues head for the pot. Then I’ll run a lot of simulations — thousands of hypothetical trips to the coffee pot in the Riddler offices. Whoever drinks the most coffee is the ☕ Caffeine King or Queen ☕ of Riddler Headquarters!
In [58]:
%matplotlib inline
In [209]:
from IPython.display import HTML, FileLink
import imageio
import io
import itertools
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
import string
def generate_names(n, length=5):
return [''.join(s) for s in np.random.choice(list(string.ascii_uppercase), size=(n, length))]
def generate_list(n, seed_values=None):
"""Create a list of coworkers with their preferred coffee amount.
"""
# names = generate_names(n)
if seed_values is not None:
values = np.random.uniform(low=0.0, high=1.0, size=n-len(seed_values))
values = np.concatenate([values, seed_values], axis=0)
else:
values = np.random.uniform(low=0.0, high=1.0, size=n)
return [{'value': value, 'hit': 0, 'miss': 0, 'coffee': 0} for value in values]
def sample_previous_list(values, weights):
"""Randomly sample with replacement based on the weights from the previous simulation.
"""
values = np.random.choice(values, size=len(values), replace=True, p=weights/weights.sum())
scale = 10**-4
values = values + np.random.normal(loc=0.0, scale=scale, size=len(values))
values = np.clip(values, 0.0, 1.0)
return [{'value': value, 'hit': 0, 'miss': 0, 'coffee': 0} for value in values]
def run_simulation(entries, n_pots=100):
n_pot=0
volume = 1.0
for idx in itertools.cycle(range(len(entries))):
entry = entries[idx]
v = entry['value']
if v >= volume:
# no coffee for you
entry['miss'] += 1
volume = 1.0
n_pot += 1
if n_pot >= n_pots:
break
else:
volume -= v
entry['hit'] += 1
entry['coffee'] += v
df = pd.DataFrame(entries)
df['n_pots'] = n_pots
return df
In [208]:
entries = generate_list(n=10**4)
df = run_simulation(entries, n_rounds=10**8)
df.head()
Out[208]:
In [210]:
df.groupby(pd.qcut(df['value'], 10)).mean()
Out[210]:
In [211]:
def plot_distributions(df):
nrows = 3
fig, axes = plt.subplots(nrows=nrows, ncols=1, figsize=(8, 4.5*nrows))
#cut = pd.qcut(df['value'], 40)
bins = np.linspace(0, 1, 51)
cut = pd.cut(df['value'], bins)
# expected value of coffee
(
df
.assign(coffee=lambda r: r['coffee'] / (r['hit'] + r['miss']))
.groupby(cut)
.mean()
.plot
.bar(y='coffee', ax=axes[0])
)
axes[0].set_title('coffee', fontsize=16)
axes[0].set_ylim([0, 1.0])
# hit ratio
(
df
.assign(hit_ratio=lambda r: r['hit'] / (r['hit'] + r['miss']))
.groupby(cut)
.mean()
.plot
.bar(y='hit_ratio', ax=axes[1])
)
axes[1].set_title('hit ratio', fontsize=16)
axes[1].set_ylim([0, 1.0])
# distribution of request values
df.groupby(cut).count().rename(columns={'n_rounds': 'count'}).plot.bar(y='count', ax=axes[2])
axes[2].set_title('distribution of request values', fontsize=16)
fig.tight_layout()
return fig
plot_distributions(df)
;
Out[211]:
In [213]:
df.sort_values('coffee', ascending=False).head(10)
Out[213]:
In [214]:
df.head(10)
Out[214]:
In [215]:
entries = sample_previous_list(df['value'], df['coffee'])
df = run_simulation(entries, n_rounds=10**7)
df.head(10)
Out[215]:
In [216]:
plot_distributions(df)
;
Out[216]:
In [218]:
def create_simulation_animation(video_filepath):
n_entries = 10**4
n_rounds = n_entries * 10**4
entries = None
n_frames = 90
fps = 3
video_writer = imageio.get_writer(video_filepath, mode='I', fps=fps)
for i in range(n_frames):
if entries is None:
# first time initialization
entries = generate_list(n=n_entries)
else:
# sample the values based on the results of the previous simulation
entries = sample_previous_list(df['value'], df['coffee'])
df = run_simulation(entries, n_rounds=n_rounds)
f = plot_distributions(df)
buf = io.BytesIO()
f.savefig(buf, format='png')
plt.close()
buf.seek(0)
img = imageio.imread(buf)
video_writer.append_data(img)
video_writer.close()
return df, HTML(
"""
<video width="976" height="576" controls>
<source src="{}" type="video/mp4">
</video>
""".format(video_filepath)
)
video_filepath = os.path.expanduser('./simulation_progress.mp4')
df, html = create_simulation_animation(video_filepath)
html
Out[218]:
In [204]:
df.sort_values('coffee', ascending=False).head(10)
Out[204]:
In [206]:
df.head(10)
Out[206]:
In [176]:
FileLink(video_filepath)
Out[176]:
In [ ]: